/*******************************************************************************
PREPERATION ON IV SAMPLE

Data 	: 
Folder 	: 
Date	: 2018-03-10

Creator		: Jonas Cederlof	(JC)
Description : 
Notes:


LATEST UPDATE: 	

*/
********************************************************************************

clear
set more			 off
cap   log close 		_all
	
*log using 	"../log/E1_prep_dejure_data.log" 	, replace 
use 		"$datapath/A3_gen_empmark.dta"


*Dropping dates prior to 2005:1 and after 2016m12 
keep if inrange(notdate_def,ym(2005,1),ym(2016,12))

*Dropping small notifications
drop if antvars<5

*If firm characteristics is missing post 2004 it is because the firm is non-existent
*in the data that year. The firm has gone bankcrupt and we know that the notification
*date is wrong.
sum atfirm if firm_avg_earnings==. & year>=2004
count if eventtime_def==0 & firm_avg_earnings==. & year>=2004
fastmax missingfirminRAMS_def, by(lopnr) name(temp)
drop if temp==1
drop temp


{ // Generate variables
*===============================================================================
preserve
	keep if eventtime_def==0
	
	// Generate dummies for age brackets

	forvalues a = 10(10)80 {	
		gen age_brack`a' = inrange(age_def,`a',`a'+10-0.01)
	}
	forvalues a = 10(10)80 {	
		bys varselid : egen share_age_brack`a' = mean(age_brack`a')
	}	

	*Generate share of WC workers within each age bracket	
	forvalues a = 10(10)80 {	
		gen byte WCage_brack`a' = inrange(age_def,`a',`a'+10-0.01) & WC==1
	}
	forvalues a = 10(10)80 {	
		bys varselid : egen WCshare_age_brack`a' = mean(WCage_brack`a')
	}
	
	*Total number of WC workers above 55 
	gen above55 = age_def>=55 & WC==1 & age_def!=.
	bys varselid : egen N_col_above55 = total(above55) 
	
	*Number of colleuge above 55 and indicator for having at least 1
	gen Nm1_col_above55 = N_col_above55 - above55
	gen col_above55 = Nm1_col_above55>0
	
	*Indicator for notification containing a WC worker of age 52-58
	bys varselid : egen bw5258 = max(age_def>=52 & age_def<=58  & WC==1 & age_def!=.) 
	lab var bw5258 "Indicator for notification having a WC worker in age 52-58"
	
	*Share of WC workers above 55 within displacement (excluding the worker himself)
	bys varselid : egen N_total_vars = total(1)
	*compare N_total_vars antvars
	gen shareabove55 = N_col_above55/N_total_vars
	gen shareabove55_2 = N_col_above55/antvars
	compare shareabove55 shareabove55_2
	gen shareabove55_LOM = Nm1_col_above55/N_total_vars
	
	*Calculate avg. layoff cost of notified WC workers
	bys varselid : egen avg_WCwage_tm1 = mean(manl_prenot_def) if above55==1
	gen xWCabove55_cost = shareabove55*avg_WCwage_tm1*(6/12)
	fastmax xWCabove55_cost, by(varselid) name(WCabove55_cost)
	drop xWCabove55_cost
	
	{ // Generate De jure notification time (DJNT)
	*===============================================================================

	gen 	de_jure_NT_age = .
	replace de_jure_NT_age = 30 if inrange(tenureatnot,0,23) & startdate3>=ym(1997,1)
	replace de_jure_NT_age = 60 if inrange(tenureatnot,24,47) & startdate3>=ym(1997,1)
	replace de_jure_NT_age = 90 if inrange(tenureatnot,48,71) & startdate3>=ym(1997,1)
	replace de_jure_NT_age = 120 if inrange(tenureatnot,72,95) & startdate3>=ym(1997,1)
	replace de_jure_NT_age = 150 if inrange(tenureatnot,96,119) & startdate3>=ym(1997,1)
	replace de_jure_NT_age = 180 if tenureatnot>=120 & tenureatnot!=. & startdate3>=ym(1997,1)
	replace de_jure_NT_age = 365 if age_def>=55 & tenureatnot>=10*12 & tenureatnot!=. & WC==1

	replace de_jure_NT_age = 30 if inrange(age_def,0,24.99) & startdate3<ym(1997,1)
	replace de_jure_NT_age = 60 if inrange(age_def,25,29.99) & startdate3<ym(1997,1)
	replace de_jure_NT_age = 90 if inrange(age_def,30,34.99) & startdate3<ym(1997,1)
	replace de_jure_NT_age = 120 if inrange(age_def,35,39.99) & startdate3<ym(1997,1)
	replace de_jure_NT_age = 150 if inrange(age_def,40,44.99) & startdate3<ym(1997,1)
	replace de_jure_NT_age = 180 if age_def>=45 & startdate3<ym(1997,1) & age_def!=.

	replace de_jure_NT_age = de_jure_NT_age/30
	count if de_jure_NT_age==. 	
	count if tenureatnot==. 	

	}
	*
	
	*Calculate avg. layoff cost for all notifed workers (inc. blue-collars)
	bys varselid : egen avg_wage_tm1 = mean(manl_prenot_def)
	bys varselid : egen avg_de_jure_NT_age = mean(de_jure_NT_age)
	gen dejure_layoffcost_perworker = (avg_wage_tm1*avg_de_jure_NT_age)
	bys varselid :  egen total_dejure_layoffcost = total(de_jure_NT_age/12)

	*Calculate average tenure within layoff
	bys varselid : egen avg_tenure_layoff = mean(tenureatnot)
	
	*Calculate age  within layoff
	bys varselid : egen avg_age_layoff = mean(age_def)
	
	*Calculate share females  within layoff
	bys varselid : egen share_female_layoff = mean(female)
	
	*Calculate average pre earnings within layodd
	bys varselid : egen avg_earnings_layoff = mean(annual_ear_prenot_def)
	
	*Tenure brackets
	gen tenurebracket = . 
	replace tenurebracket = 1 if inrange(tenureatnot,0,23) 
	replace tenurebracket = 2 if inrange(tenureatnot,24,47)
	replace tenurebracket = 3 if inrange(tenureatnot,48,71)
	replace tenurebracket = 4 if inrange(tenureatnot,72,95) 
	replace tenurebracket = 5 if inrange(tenureatnot,96,119) 
	replace tenurebracket = 6 if tenureatnot>=120  & tenureatnot!=.
	
	qui tab tenurebracket, gen(D_tenurebracket)
	forvalues i=1(1)6 {
		bys varselid : egen share_within_tenurebracket`i' = mean(D_tenurebracket`i')
	}	
	
	
	
	keep 	col_above55 shareabove55*  bw5258 share_age_brack* ///
		lopnr persid  WCshare_age_brack* WCabove55_cost avg_WCwage_tm1 ///
		de_jure_NT_age avg_de_jure_NT_age dejure_layoffcost_perworker ///
		total_dejure_layoffcost N_total_vars N_col_above55 above55 ///
		avg_age_layoff avg_tenure_layoff *tenurebracket* share_female_layoff ///
		avg_earnings_layoff
	
	tempfile temp
	save `temp'
restore
merge m:1  lopnr persid using `temp'
drop _merge
}
*


{ // Generate SNI codes
*===============================================================================
preserve
	keep if eventtime_def==0

	*Generate: two digit and letter SNI
	gen sni02 = substr(astsni02,1,2)
	gen sni07 = substr(astsni07,1,2)

	destring sni02 sni07, replace

	*If both sni02 and sni07 exists, let sni07 have priority
	replace sni02=. if sni07!=.
	replace sni02=. if sni02==0
	replace sni07=. if sni07==0

	*Follows the letters of SNI07 and sometimes disaggeragtes to SNI02.
	gen str	SNI_1dig = "" 
	replace SNI_1dig = "A" 	if inrange(sni02,1,5) | inrange(sni07,1,3)
	replace SNI_1dig = "B"  if inrange(sni02,10,14) | inrange(sni07,5,9)
	replace SNI_1dig = "C"  if inrange(sni02,15,33) | inrange(sni07,10,33)
	replace SNI_1dig = "DE" if inrange(sni02,40,21) | inrange(sni07,35,39)
	replace SNI_1dig = "F"  if inrange(sni02,45,45) | inrange(sni07,41,43)
	replace SNI_1dig = "G"  if inrange(sni02,50,52) | inrange(sni07,45,47)
	replace SNI_1dig = "I"  if inrange(sni02,55,55) | inrange(sni07,55,56)
	replace SNI_1dig = "HJ" if inrange(sni02,60,64) | inrange(sni07,49,53) | inrange(sni07,58,63)
	replace SNI_1dig = "K"  if inrange(sni02,65,67) | inrange(sni07,64,66)
	replace SNI_1dig = "LMN" if inrange(sni02,70,75) | inrange(sni07,68,82)
	replace SNI_1dig = "O"  if inrange(sni02,74,74) | inrange(sni07,84,84)
	replace SNI_1dig = "P"  if inrange(sni02,80,80) | inrange(sni07,85,85)
	replace SNI_1dig = "Q"  if inrange(sni02,85,85) | inrange(sni07,86,88)
	replace SNI_1dig = "RS" if inrange(sni02,90,93) | inrange(sni07,90,96)
	replace SNI_1dig = "T"  if inrange(sni02,95,95) | inrange(sni07,97,98)
	replace SNI_1dig = "U"  if inrange(sni02,99,99) | inrange(sni07,99,99)

	qui tab SNI_1dig, gen(SNItemp)
	gen 	SNI_2dig = sni07
	replace SNI_2dig = sni02 + 1000 if SNI_2dig==. & year<2007

	keep SNI* lopnr persid
	tempfile temp
	save 	`temp'
restore
merge m:1  lopnr persid using `temp'
drop _merge
}
*

{ // Generate variables
*===============================================================================
*Mean age among displaced workers
bys varselid : egen xavg_age_not = mean(age_def) if eventtime_def==0
fastmax xavg_age_not , name(avg_age_not) by(varselid)
gen sq_avg_age_not = avg_age_not^2

*Mean age squared in the notifying firm
gen q_firm_age_RAMS=firm_avg_age^2

*White collar indicator at notification
bys lopnr (date) : egen WC_at_not = max(WC==1 & eventtime_def==0 )
}
*

save "../../../../../data2/cedsei/E1_largesample_firm.dta",replace


{ // Dropping observations
*===============================================================================
*Dropping where running variable age is missing (flytta ner denna!! 2021-06-22)
drop if age_def==.

*Dropping negative notification times
keep if inkom_dat<anstupp_dat

*Drop Bankruptcy
*Label values of new variable
cap lab define orsak 	1 "Anna" 2 "Effe" 3 "Flve" 4 "L*ge" 5 "Konk" 6 "Nedl"
lab val varselorsak orsak
drop if varselorsak==5
}
*


{ // Generate variables
*===============================================================================	

*Age controls and treatment indicator
gen N_age_def = age_def - 55
gen N_above55 = N_age_def>0 & inrange(N_age_def,-3,3)

*Generate age bracket interacted with runnig variable (3 year width)
forvalues i = -39(3)25{
	local a = `i' + 55
		gen ageFE_`a' = inrange(N_age_def,`i',`i'+3-0.01)*N_age_def
		
}	
replace ageFE_55=1*N_age_def if N_age_def==3
replace ageFE_58=0 if N_age_def==3
replace ageFE_16=1*N_age_def if N_age_def<-39

*Generate age bracket fixed effects (6 year witdh)
forvalues i = -39(6)27{
	local b = `i' + 55
	gen byte ageCUT_`b' =  inrange(N_age_def,`i',`i'+6-0.01)	
}

	
replace ageCUT_16 = 1 if N_age_def<-35
replace ageCUT_52=1 if age_def==58
replace ageCUT_58=0 if age_def==58



}
*




// Normalize control variables 
*===============================================================================
foreach var of   varlist annual_ear_prenot_def female immigrant tenureatnot educ0 educ1 educ2 educ3 educ4 {
	qui sum `var' 	
	gen C_`var' = `var' - r(mean)
}	
*





compress
save "$datapath/E1_largesample.dta",replace


